RRPlot and the Colon data set

Libraries

library(survival)
library(FRESA.CAD)
## Loading required package: Rcpp
## Loading required package: stringr
## Loading required package: miscTools
## Loading required package: Hmisc
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
## 
##     format.pval, units
## Loading required package: pROC
## Type 'citation("pROC")' for a citation.
## 
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
## 
##     cov, smooth, var
#library(corrplot)
#source("~/GitHub/FRESA.CAD/R/RRPlot.R")
#source("~/GitHub/FRESA.CAD/R/PoissonEventRiskCalibration.R")
op <- par(no.readonly = TRUE)
pander::panderOptions('digits', 3)
#pander::panderOptions('table.split.table', 400)
pander::panderOptions('keep.trailing.zeros',TRUE)

The data set

data(cancer)
colon <- subset(colon,etype==1)
colon$etype <- NULL
rownames(colon) <- colon$id
colon$id <- NULL
colon <- colon[complete.cases(colon),]
time <- colon$time
status <- colon$status
data <- colon
data$time <- NULL
data$study <- NULL
table(data$status)

0 1 442 446

dataColon <- as.data.frame(model.matrix(status~.*.,data))
dataColon$`(Intercept)` <- NULL
dataColon$time <- time/365
dataColon$status <- status
colnames(dataColon) <-str_replace_all(colnames(dataColon),":","_")
colnames(dataColon) <-str_replace_all(colnames(dataColon),"\\.","_")
colnames(dataColon) <-str_replace_all(colnames(dataColon),"\\+","_")
data <- NULL

trainsamples <- sample(nrow(dataColon),0.7*nrow(dataColon))
dataColonTrain <- dataColon[trainsamples,]
dataColonTest <- dataColon[-trainsamples,]


pander::pander(table(dataColonTrain$status))
0 1
312 309
pander::pander(table(dataColonTest$status))
0 1
130 137

Modeling

ml <- BSWiMS.model(Surv(time,status)~1,data=dataColonTrain,NumberofRepeats = 3)

[++++++-+++++-++++++-].

sm <- summary(ml)
pander::pander(sm$coefficients)
Table continues below
  Estimate lower HR upper u.Accuracy
age_nodes 0.000429 1.000 1.000 1.001 0.618
differ_node4 0.093433 1.062 1.098 1.135 0.607
extent_node4 0.027126 1.017 1.027 1.038 0.607
nodes_node4 0.006979 1.004 1.007 1.010 0.609
rxLev_5FU_age -0.002420 0.997 0.998 0.999 0.576
node4 0.076526 1.042 1.080 1.118 0.607
age_node4 0.001210 1.001 1.001 1.002 0.607
rxLev_5FU -0.077728 0.889 0.925 0.963 0.576
rxLev_5FU_differ -0.039137 0.942 0.962 0.981 0.576
rxLev_5FU_sex -0.065512 0.904 0.937 0.970 0.554
extent 0.139213 1.066 1.149 1.239 0.549
rxLev_5FU_extent -0.015696 0.976 0.984 0.993 0.576
differ_extent 0.016119 1.006 1.016 1.027 0.559
age -0.003365 0.994 0.997 0.999 0.519
sex_nodes -0.000831 0.999 0.999 1.000 0.519
nodes 0.018653 1.005 1.019 1.033 0.620
nodes_extent -0.002573 0.996 0.997 0.999 0.630
nodes_differ 0.001999 1.000 1.002 1.004 0.614
Table continues below
  r.Accuracy full.Accuracy u.AUC r.AUC full.AUC
age_nodes 0.528 0.633 0.618 0.528 0.632
differ_node4 0.613 0.624 0.606 0.613 0.624
extent_node4 0.614 0.606 0.606 0.614 0.605
nodes_node4 0.582 0.600 0.607 0.582 0.599
rxLev_5FU_age 0.614 0.624 0.577 0.613 0.624
node4 0.636 0.632 0.606 0.635 0.631
age_node4 0.629 0.631 0.606 0.629 0.631
rxLev_5FU 0.605 0.629 0.577 0.604 0.628
rxLev_5FU_differ 0.602 0.629 0.577 0.601 0.629
rxLev_5FU_sex 0.605 0.606 0.556 0.604 0.605
extent 0.617 0.624 0.551 0.616 0.624
rxLev_5FU_extent 0.604 0.600 0.577 0.603 0.599
differ_extent 0.618 0.631 0.560 0.617 0.631
age 0.618 0.638 0.518 0.618 0.637
sex_nodes 0.607 0.583 0.518 0.606 0.583
nodes 0.617 0.631 0.619 0.617 0.631
nodes_extent 0.620 0.637 0.629 0.619 0.637
nodes_differ 0.603 0.620 0.613 0.601 0.619
  IDI NRI z.IDI z.NRI Delta.AUC Frequency
age_nodes 0.03923 0.4574 6.70 6.211 0.104234 1.000
differ_node4 0.03564 0.4006 5.57 5.996 0.010851 1.000
extent_node4 0.02918 0.4043 5.29 6.046 -0.009489 1.000
nodes_node4 0.02749 0.2718 5.16 4.208 0.016559 1.000
rxLev_5FU_age 0.02394 0.3074 4.56 4.140 0.011224 1.000
node4 0.01439 0.3803 4.18 5.740 -0.004203 1.000
age_node4 0.01274 0.3668 4.12 5.552 0.001962 1.000
rxLev_5FU 0.01763 0.3093 3.82 4.168 0.024370 1.000
rxLev_5FU_differ 0.01708 0.3071 3.77 4.134 0.027948 1.000
rxLev_5FU_sex 0.01755 0.2208 3.68 3.868 0.000999 1.000
extent 0.01546 0.1846 3.63 3.430 0.007870 1.000
rxLev_5FU_extent 0.01453 0.3093 3.41 4.168 -0.004244 1.000
differ_extent 0.01099 0.2862 3.00 3.614 0.014133 1.000
age 0.00893 0.1375 2.82 1.726 0.019635 0.667
sex_nodes 0.00487 0.0287 2.74 0.358 -0.023089 0.333
nodes 0.00493 0.2133 2.68 2.749 0.013979 1.000
nodes_extent 0.00481 0.1454 2.68 1.873 0.018238 1.000
nodes_differ 0.00379 0.1888 2.40 2.459 0.017968 1.000

Cox Model Performance

Here we evaluate the model using the RRPlot() function.

The evaluation of the raw Cox model with RRPlot()

Here we will use the predicted event probability assuming a baseline hazard for events withing 5 years

index <- predict(ml,dataColonTrain)
timeinterval <- round(2*mean(subset(dataColonTrain,status==1)$time),0)

h0 <- sum(dataColonTrain$status & dataColonTrain$time <= timeinterval)
h0 <- h0/sum((dataColonTrain$time > timeinterval) | (dataColonTrain$status==1))

rdata <- cbind(dataColonTrain$status,ppoisGzero(index,h0))

rrAnalysisTrain <- RRPlot(rdata,atRate=c(0.90),
                     timetoEvent=dataColonTrain$time,
                     title="Raw Train: Colon Cancer",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=timeinterval)

Time to event

toinclude <- rdata[,1]==1 
obstiemToEvent <- dataColonTrain[,"time"]
tmin<-min(obstiemToEvent)
sum(toinclude)

[1] 309

timetoEvent <- meanTimeToEvent(rdata[,2],timeinterval)
tmax<-max(c(obstiemToEvent,timetoEvent))
lmfit <- lm(obstiemToEvent[toinclude]~0+timetoEvent[toinclude])
sm <- summary(lmfit)
pander::pander(sm)
  Estimate Std. Error t value Pr(>|t|)
timetoEvent[toinclude] 0.453 0.0232 19.5 9.29e-56
Fitting linear model: obstiemToEvent[toinclude] ~ 0 + timetoEvent[toinclude]
Observations Residual Std. Error \(R^2\) Adjusted \(R^2\)
309 1.42 0.553 0.551
plot(timetoEvent,obstiemToEvent,
     col=1+rdata[,1],
     xlab="Expected Time",
     ylab="Observed Time",
     main="Expected vs. Observed",
     xlim=c(tmin,tmax),
     ylim=c(tmin,tmax),
     log="xy")
lines(x=c(tmin,tmax),y=lmfit$coefficients*c(tmin,tmax),lty=1,col="blue")
txt <- bquote(paste(R^2 == .(round(sm$r.squared,3))))
text(tmin+0.005*(tmax-tmin),tmax,txt,cex=0.7)
text(tmin+0.015*(tmax-tmin),tmax,sprintf("Slope=%4.3f",sm$coefficients[1]),cex=0.7)
legend("bottomright",legend=c("No Event","Event","Linear fit"),
             pch=c(1,1,-1),
             col=c(1,2,"blue"),
             lty=c(-1,-1,1)
             )

MADerror2 <- mean(abs(timetoEvent[toinclude]-obstiemToEvent[toinclude]))
pander::pander(MADerror2)

2.04

Uncalibrated Performance Report

pander::pander(t(rrAnalysisTrain$keyPoints),caption="Threshold values")
Threshold values
  @:0.9 @MAX_BACC @MAX_RR @SPE100 p(0.5)
Thr 0.453 0.345 0.238 0.18616 0.499
RR 1.782 1.816 2.550 1.00000 1.891
RR_LCI 1.548 1.543 1.531 0.00000 1.654
RR_UCI 2.050 2.138 4.246 0.00000 2.163
SEN 0.327 0.595 0.961 1.00000 0.285
SPE 0.897 0.699 0.147 0.00321 0.936
BACC 0.612 0.647 0.554 0.50160 0.610
NetBenefit 0.120 0.216 0.345 0.38304 0.110
pander::pander(t(rrAnalysisTrain$OERatio$estimate),caption="O/E Ratio")
O/E Ratio
O/E Low Upper p.value
0.96 0.856 1.07 0.486
pander::pander(t(rrAnalysisTrain$OE95ci),caption="O/E Mean")
O/E Mean
mean 50% 2.5% 97.5%
1.41 1.41 1.38 1.43
pander::pander(t(rrAnalysisTrain$OAcum95ci),caption="O/Acum Mean")
O/Acum Mean
mean 50% 2.5% 97.5%
1.35 1.35 1.35 1.35
pander::pander(rrAnalysisTrain$c.index$cstatCI,caption="C. Index")
mean.C Index median lower upper
0.653 0.652 0.623 0.682
pander::pander(t(rrAnalysisTrain$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.687 0.646 0.728
pander::pander((rrAnalysisTrain$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.327 0.275 0.382
pander::pander((rrAnalysisTrain$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.897 0.858 0.929
pander::pander(t(rrAnalysisTrain$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90%
0.452
pander::pander(rrAnalysisTrain$surdif,caption="Logrank test")
Logrank test Chisq = 78.107277 on 1 degrees of freedom, p = 0.000000
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 488 208 263 11.5 78.1
class=1 133 101 46 65.7 78.1

Cox Calibration

op <- par(no.readonly = TRUE)


calprob <- CoxRiskCalibration(ml,dataColonTrain,"status","time",timeInterval=timeinterval)

pander::pander(c(h0=calprob$h0,
                 Gain=calprob$hazardGain,
                 TimeInterval=calprob$timeInterval),
               caption="Cox Calibration Parameters")
h0 Gain TimeInterval
0.674 1.53 3.2

The RRplot() of the calibrated model

h0 <- calprob$h0
timeinterval <- calprob$timeInterval;

rdata <- cbind(dataColonTrain$status,calprob$prob)


rrAnalysisTrain <- RRPlot(rdata,atRate=c(0.90),
                     timetoEvent=dataColonTrain$time,
                     title="Calibrated Train: Colon",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=timeinterval)

Time to event after calibration

timetoEvent <- meanTimeToEvent(rdata[,2],timeinterval)
tmax<-max(c(obstiemToEvent,timetoEvent))
lmfit <- lm(obstiemToEvent[toinclude]~0+timetoEvent[toinclude])
sm <- summary(lmfit)
pander::pander(sm)
  Estimate Std. Error t value Pr(>|t|)
timetoEvent[toinclude] 0.649 0.0333 19.5 9.29e-56
Fitting linear model: obstiemToEvent[toinclude] ~ 0 + timetoEvent[toinclude]
Observations Residual Std. Error \(R^2\) Adjusted \(R^2\)
309 1.42 0.553 0.551
plot(timetoEvent,obstiemToEvent,
     col=1+rdata[,1],
     xlab="Expected time",
     ylab="Observed time",
     main="Expected vs. Observed",
     xlim=c(tmin,tmax),
     ylim=c(tmin,tmax),
     log="xy")
lines(x=c(tmin,tmax),y=lmfit$coefficients*c(tmin,tmax),lty=1,col="blue")
txt <- bquote(paste(R^2 == .(round(sm$r.squared,3))))
text(tmin+0.005*(tmax-tmin),tmax,txt,cex=0.7)
text(tmin+0.015*(tmax-tmin),tmax,sprintf("Slope=%4.3f",sm$coefficients[1]),cex=0.7)
legend("bottomright",legend=c("No Event","Event","Linear fit"),
             pch=c(1,1,-1),
             col=c(1,2,"blue"),
             lty=c(-1,-1,1)
             )

MADerror2 <- c(MADerror2,mean(abs(timetoEvent[toinclude]-obstiemToEvent[toinclude])))
pander::pander(MADerror2)

2.04 and 1.36

Calibrated Train Performance

pander::pander(t(rrAnalysisTrain$keyPoints),caption="Threshold values")
Threshold values
  @:0.9 @MAX_BACC @MAX_RR @SPE100 p(0.5)
Thr 0.6029 0.477 0.340 0.27052 0.499
RR 1.7817 1.816 2.550 1.00000 1.696
RR_LCI 1.5481 1.543 1.531 0.00000 1.462
RR_UCI 2.0505 2.138 4.246 0.00000 1.968
SEN 0.3269 0.595 0.961 1.00000 0.450
SPE 0.8974 0.699 0.147 0.00321 0.798
BACC 0.6121 0.647 0.554 0.50160 0.624
NetBenefit 0.0844 0.158 0.258 0.31189 0.123
pander::pander(t(rrAnalysisTrain$OERatio$estimate),caption="O/E Ratio")
O/E Ratio
O/E Low Upper p.value
0.777 0.693 0.869 4.38e-06
pander::pander(t(rrAnalysisTrain$OE95ci),caption="O/E Mean")
O/E Mean
mean 50% 2.5% 97.5%
0.994 0.994 0.979 1.01
pander::pander(t(rrAnalysisTrain$OAcum95ci),caption="O/Acum Mean")
O/Acum Mean
mean 50% 2.5% 97.5%
1.04 1.04 1.04 1.04
pander::pander(rrAnalysisTrain$c.index$cstatCI,caption="C. Index")
mean.C Index median lower upper
0.653 0.653 0.621 0.683
pander::pander(t(rrAnalysisTrain$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.687 0.646 0.728
pander::pander((rrAnalysisTrain$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.327 0.275 0.382
pander::pander((rrAnalysisTrain$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.897 0.858 0.929
pander::pander(t(rrAnalysisTrain$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90%
0.602
pander::pander(rrAnalysisTrain$surdif,caption="Logrank test")
Logrank test Chisq = 78.107277 on 1 degrees of freedom, p = 0.000000
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 488 208 263 11.5 78.1
class=1 133 101 46 65.7 78.1

Evaluating on the test set

The calibrated h0 and timeinterval were estimated on the training set

index <- predict(ml,dataColonTest)
rdata <- cbind(dataColonTest$status,ppoisGzero(index,h0))

rrAnalysisTest <- RRPlot(rdata,atThr = rrAnalysisTrain$thr_atP,
                     timetoEvent=dataColonTest$time,
                     title="Test: Colon Cancer",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=timeinterval)

Test Performance

pander::pander(t(rrAnalysisTest$keyPoints),caption="Threshold values")
Threshold values
  @:0.602 @MAX_BACC @MAX_RR @SPE100 p(0.5)
Thr 0.6017 0.490 0.338 2.82e-01 0.500
RR 1.4813 1.778 4.210 1.56e+01 1.582
RR_LCI 1.1834 1.409 1.457 3.51e-02 1.264
RR_UCI 1.8541 2.244 12.169 6.91e+03 1.980
SEN 0.3139 0.555 0.978 1.00e+00 0.474
SPE 0.8462 0.738 0.154 2.31e-02 0.754
BACC 0.5800 0.647 0.566 5.12e-01 0.614
NetBenefit 0.0479 0.162 0.291 3.26e-01 0.123
pander::pander(t(rrAnalysisTest$OERatio$estimate),caption="O/E Ratio")
O/E Ratio
O/E Low Upper p.value
0.876 0.736 1.04 0.128
pander::pander(t(rrAnalysisTest$OE95ci),caption="O/E Mean")
O/E Mean
mean 50% 2.5% 97.5%
1.23 1.23 1.21 1.26
pander::pander(t(rrAnalysisTest$OAcum95ci),caption="O/Acum Mean")
O/Acum Mean
mean 50% 2.5% 97.5%
1.02 1.02 1.02 1.03
pander::pander(rrAnalysisTest$c.index$cstatCI,caption="C. Index")
mean.C Index median lower upper
0.663 0.662 0.62 0.706
pander::pander(t(rrAnalysisTest$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.679 0.615 0.743
pander::pander((rrAnalysisTest$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.307 0.231 0.391
pander::pander((rrAnalysisTest$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.846 0.772 0.903
pander::pander(t(rrAnalysisTest$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90%
0.602
pander::pander(rrAnalysisTest$surdif,caption="Logrank test")
Logrank test Chisq = 18.259279 on 1 degrees of freedom, p = 0.000019
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 205 95 113.7 3.08 18.3
class=1 62 42 23.3 15.06 18.3

Cross-Validation

Here we will cross validate the training set and evaluate also on the testing set. The h0 and the timeinterval are the ones estimated on the calibration process

rcv <- randomCV(theData=dataColonTrain,
                theOutcome = Surv(time,status)~1,
                fittingFunction=BSWiMS.model, 
                trainFraction = 0.75,
                repetitions=50,
                classSamplingType = "Pro",
                testingSet=dataColonTest
         )

.[+++-+-].[+++++++++++-]..[+++–].[++++++-].[++++++-].[+++++-+-].[++++++++–].[++++++++-].[+++++++-].[++++++++++-].10 Tested: 859 Avg. Selected: 17.7 Min Tests: 1 Max Tests: 10 Mean Tests: 4.924331 . MAD: 0.4709705

.[++++++++-].[+++++++-].[+++++++-].[+++++-].[++++++++++++++++]..[++++-].[+++++-].[++++++++-+-].[++++++++++-]..[++++++-]20 Tested: 886 Avg. Selected: 17.35 Min Tests: 1 Max Tests: 20 Mean Tests: 9.548533 . MAD: 0.4721706

.[+++++++++-].[+++++++-].[+++++++-].[+++++++-].[+++++-++-].[+++++++++++++]..[+++++++-].[++++++-].[+++++++-++–].[++++++++–]30 Tested: 888 Avg. Selected: 17.56667 Min Tests: 2 Max Tests: 30 Mean Tests: 14.29054 . MAD: 0.4727218

.[++++++++-].[+++++++++-].[++++++++-].[++++++++-].[++++++-].[++++++++-].[++++++++-].[++++++-].[++++++-].[++++++-]40 Tested: 888 Avg. Selected: 17.175 Min Tests: 2 Max Tests: 40 Mean Tests: 19.05405 . MAD: 0.4726585

.[+++++++-].[+++++++++-].[+++++-].[++++++-].[++++-++-].[++-+-].[++++++++-].[+++++++-].[++++++++-].[+++++-]50 Tested: 888 Avg. Selected: 17.2 Min Tests: 6 Max Tests: 50 Mean Tests: 23.81757 . MAD: 0.4725459

stp <- rcv$survTestPredictions
stp <- stp[!is.na(stp[,4]),]

bbx <- boxplot(unlist(stp[,1])~rownames(stp),plot=FALSE)
times <- bbx$stats[3,]
status <- boxplot(unlist(stp[,2])~rownames(stp),plot=FALSE)$stats[3,]
prob <- ppoisGzero(boxplot(unlist(stp[,4])~rownames(stp),plot=FALSE)$stats[3,],h0)

rdatacv <- cbind(status,prob)
rownames(rdatacv) <- bbx$names
names(times) <- bbx$names

rrAnalysisCVTest <- RRPlot(rdatacv,atThr = rrAnalysisTrain$thr_atP,
                     timetoEvent=times,
                     title="CV Test: Colon Cancer",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=timeinterval)

CV Test Performance

pander::pander(t(rrAnalysisCVTest$keyPoints),caption="Threshold values")
Threshold values
  @:0.602 @MAX_BACC @MAX_RR @SPE100 p(0.5)
Thr 0.6030 0.459 0.354 0.268 0.4998
RR 1.6384 1.714 2.479 1.000 1.5245
RR_LCI 1.4518 1.492 1.598 0.000 1.3465
RR_UCI 1.8491 1.968 3.847 0.000 1.7260
SEN 0.3184 0.619 0.964 1.000 0.3857
SPE 0.8756 0.647 0.133 0.000 0.8032
BACC 0.5970 0.633 0.549 0.500 0.5944
NetBenefit 0.0658 0.162 0.248 0.320 0.0958
pander::pander(t(rrAnalysisCVTest$OERatio$estimate),caption="O/E Ratio")
O/E Ratio
O/E Low Upper p.value
0.795 0.723 0.872 5.02e-07
pander::pander(t(rrAnalysisCVTest$OE95ci),caption="O/E Mean")
O/E Mean
mean 50% 2.5% 97.5%
1.07 1.07 1.06 1.08
pander::pander(t(rrAnalysisCVTest$OAcum95ci),caption="O/Acum Mean")
O/Acum Mean
mean 50% 2.5% 97.5%
1.04 1.04 1.03 1.04
pander::pander(rrAnalysisCVTest$c.index$cstatCI,caption="C. Index")
mean.C Index median lower upper
0.649 0.649 0.625 0.673
pander::pander(t(rrAnalysisCVTest$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.671 0.635 0.706
pander::pander((rrAnalysisCVTest$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.318 0.275 0.364
pander::pander((rrAnalysisCVTest$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.876 0.841 0.905
pander::pander(t(rrAnalysisCVTest$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90%
0.602
pander::pander(rrAnalysisCVTest$surdif,caption="Logrank test")
Logrank test Chisq = 86.318718 on 1 degrees of freedom, p = 0.000000
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 691 304 375.3 13.6 86.3
class=1 197 142 70.7 72.0 86.3